Loading requierd libraries
(library(ggplot2))
(library(tidyr))
(library(dplyr))
(library(readxl))
(library(gridExtra))
(library(plotly))
Getting the complete raw data
raw.data <- read.csv("data/Fifa19C.csv",stringsAsFactors = F)
Selecting only the required columns from the complete dataframe
data <- raw.data %>% select(ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Preferred.Foot,International.Reputation,Weak.Foot,Skill.Moves,Position,Jersey.Number,Height,Weight,Release.Clause)
Converting categorical values to factors
toNumberCurrency <- function(vector) {
vector <- as.character(vector)
#replace '???' with 'Euro sign'
vector <- gsub("(|,)","", vector)
result <- as.numeric(vector)
k_positions <- grep("K", vector)
result[k_positions] <- as.numeric(gsub("K","",vector[k_positions])) * 1000
m_positions <- grep("M", vector)
result[m_positions] <- as.numeric(gsub("M","",vector[m_positions])) * 1000000
return(result)
}
data$Wage <- toNumberCurrency(data$Wage)
NAs introduced by coercion
data$Value <- toNumberCurrency(data$Value)
NAs introduced by coercion
data$Release.Clause <- toNumberCurrency(data$Release.Clause)
NAs introduced by coercion
Converting height to centimeters
l <- strsplit(data$Height,"'",fixed = T)
data$Height <- sapply(l, function(x) sum(as.numeric(x)*c(30.4,2.54)))
Removing lbs from the weight column
data$Weight <- gsub("[a-zA-Z ]", "", data$Weight)
data$Weight <- as.numeric(data$Weight)
Replace specefic positions of players with more general positions
data$Position <- if_else(data$Position=="ST" | data$Position=="RS" | data$Position=="LS","ST",data$Position)
data$Position <- if_else(data$Position=="LF" | data$Position=="LW","LW",data$Position)
data$Position <- if_else(data$Position=="RF" | data$Position=="RW","RW",data$Position)
data$Position <- if_else(data$Position=="CM" | data$Position=="RCM" | data$Position=="LCM","CM",data$Position)
data$Position <- if_else(data$Position=="CB" | data$Position=="RCB" | data$Position=="LCB","CB",data$Position)
data$Position <- if_else(data$Position=="CDM" | data$Position=="LDM" | data$Position=="RDM","CDM",data$Position)
data$Position <- if_else(data$Position=="CAM" | data$Position=="LAM" | data$Position=="RAM","CAM",data$Position)
head(data)
Total players based on position
Getting top players using order
top.players <- data[order(data$Overall,decreasing = T),]
top.25 <- head(top.players,25)
Jersey numbers for the top 25 players
pl <- ggplot(top.25,aes(Jersey.Number)) + geom_bar() + theme_minimal()
ggplotly(pl)
Ages of top 25 players
pl <- ggplot(top.25,aes(Age)) + geom_histogram(binwidth = 1) + theme_minimal()
ggplotly(pl)
Top 25 based on countries
Top 25 by position
pl <- ggplot(top.25,aes(x = Position,fill = Name)) + geom_bar() + theme_minimal()
ggplotly(pl)
Getting the top potential players with age < 25
top.50.potential <- head(top.potential,50)
Clubs of top 50 potential players
pl <- ggplot(top.50.potential,aes(x=Nationality)) + geom_bar(aes(fill=Name)) + theme_minimal() + theme(axis.text.x = element_text(angle = 45,hjust = 1))
ggplotly(pl)
top.25.and.top.50.potential <- rbind(top.25,top.50.potential)
Top 50 players based on Club
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpMb2FkaW5nIHJlcXVpZXJkIGxpYnJhcmllcw0KYGBge3J9DQoobGlicmFyeShnZ3Bsb3QyKSkNCihsaWJyYXJ5KHRpZHlyKSkNCihsaWJyYXJ5KGRwbHlyKSkNCihsaWJyYXJ5KHJlYWR4bCkpDQoobGlicmFyeShncmlkRXh0cmEpKQ0KKGxpYnJhcnkocGxvdGx5KSkNCmBgYA0KDQpHZXR0aW5nIHRoZSBjb21wbGV0ZSByYXcgZGF0YQ0KYGBge3J9DQpyYXcuZGF0YSA8LSByZWFkLmNzdigiZGF0YS9GaWZhMTlDLmNzdiIsc3RyaW5nc0FzRmFjdG9ycyA9IEYpDQpgYGANCg0KDQpTZWxlY3Rpbmcgb25seSB0aGUgcmVxdWlyZWQgY29sdW1ucyBmcm9tIHRoZSBjb21wbGV0ZSBkYXRhZnJhbWUNCmBgYHtyfQ0KZGF0YSA8LSByYXcuZGF0YSAlPiUgc2VsZWN0KElELE5hbWUsQWdlLE5hdGlvbmFsaXR5LE92ZXJhbGwsUG90ZW50aWFsLENsdWIsVmFsdWUsV2FnZSxQcmVmZXJyZWQuRm9vdCxJbnRlcm5hdGlvbmFsLlJlcHV0YXRpb24sV2Vhay5Gb290LFNraWxsLk1vdmVzLFBvc2l0aW9uLEplcnNleS5OdW1iZXIsSGVpZ2h0LFdlaWdodCxSZWxlYXNlLkNsYXVzZSkNCmBgYA0KDQoNCkNvbnZlcnRpbmcgY2F0ZWdvcmljYWwgdmFsdWVzIHRvIGZhY3RvcnMNCmBgYHtyfQ0KZGF0YSRJbnRlcm5hdGlvbmFsLlJlcHV0YXRpb24gPC0gYXMuZmFjdG9yKGRhdGEkSW50ZXJuYXRpb25hbC5SZXB1dGF0aW9uKQ0KZGF0YSRXZWFrLkZvb3QgPC0gYXMuZmFjdG9yKGRhdGEkV2Vhay5Gb290KQ0KZGF0YSRTa2lsbC5Nb3ZlcyA8LSBhcy5mYWN0b3IoZGF0YSRTa2lsbC5Nb3ZlcykNCmRhdGEkSmVyc2V5Lk51bWJlciA8LSBhcy5mYWN0b3IoZGF0YSRKZXJzZXkuTnVtYmVyKQ0KDQoNCmhlYWQoZGF0YSkNCmBgYA0KDQoNCmBgYHtyfQ0KdG9OdW1iZXJDdXJyZW5jeSA8LSBmdW5jdGlvbih2ZWN0b3IpIHsNCiAgICB2ZWN0b3IgPC0gYXMuY2hhcmFjdGVyKHZlY3RvcikNCiAgICAjcmVwbGFjZSAnPz8/JyB3aXRoICdFdXJvIHNpZ24nDQogICAgdmVjdG9yIDwtIGdzdWIoIig/Pz98LCkiLCIiLCB2ZWN0b3IpIA0KICAgIHJlc3VsdCA8LSBhcy5udW1lcmljKHZlY3RvcikNCiAgIA0KICAgIGtfcG9zaXRpb25zIDwtIGdyZXAoIksiLCB2ZWN0b3IpDQogICAgcmVzdWx0W2tfcG9zaXRpb25zXSA8LSBhcy5udW1lcmljKGdzdWIoIksiLCIiLHZlY3RvcltrX3Bvc2l0aW9uc10pKSAqIDEwMDANCiAgDQogICAgbV9wb3NpdGlvbnMgPC0gZ3JlcCgiTSIsIHZlY3RvcikNCiAgICByZXN1bHRbbV9wb3NpdGlvbnNdIDwtIGFzLm51bWVyaWMoZ3N1YigiTSIsIiIsdmVjdG9yW21fcG9zaXRpb25zXSkpICogMTAwMDAwMA0KICAgIA0KICAgIHJldHVybihyZXN1bHQpDQp9DQpkYXRhJFdhZ2UgPC0gdG9OdW1iZXJDdXJyZW5jeShkYXRhJFdhZ2UpIA0KZGF0YSRWYWx1ZSA8LSB0b051bWJlckN1cnJlbmN5KGRhdGEkVmFsdWUpDQpkYXRhJFJlbGVhc2UuQ2xhdXNlIDwtIHRvTnVtYmVyQ3VycmVuY3koZGF0YSRSZWxlYXNlLkNsYXVzZSkNCmBgYA0KDQpgYGB7cn0NCmhlYWQoZGF0YSkNCmBgYA0KDQoNCkNvbnZlcnRpbmcgaGVpZ2h0IHRvIGNlbnRpbWV0ZXJzDQpgYGB7cn0NCmwgPC0gc3Ryc3BsaXQoZGF0YSRIZWlnaHQsIiciLGZpeGVkID0gVCkNCmRhdGEkSGVpZ2h0IDwtIHNhcHBseShsLCBmdW5jdGlvbih4KSBzdW0oYXMubnVtZXJpYyh4KSpjKDMwLjQsMi41NCkpKQ0KYGBgDQoNClJlbW92aW5nIGxicyBmcm9tIHRoZSB3ZWlnaHQgY29sdW1uDQpgYGB7cn0NCmRhdGEkV2VpZ2h0IDwtIGdzdWIoIlthLXpBLVogXSIsICIiLCBkYXRhJFdlaWdodCkNCmRhdGEkV2VpZ2h0IDwtIGFzLm51bWVyaWMoZGF0YSRXZWlnaHQpDQpgYGANCg0KUmVwbGFjZSBzcGVjZWZpYyBwb3NpdGlvbnMgb2YgcGxheWVycyB3aXRoIG1vcmUgZ2VuZXJhbCBwb3NpdGlvbnMNCmBgYHtyfQ0KZGF0YSRQb3NpdGlvbiA8LSBpZl9lbHNlKGRhdGEkUG9zaXRpb249PSJTVCIgfCBkYXRhJFBvc2l0aW9uPT0iUlMiIHwgZGF0YSRQb3NpdGlvbj09IkxTIiwiU1QiLGRhdGEkUG9zaXRpb24pDQpkYXRhJFBvc2l0aW9uIDwtIGlmX2Vsc2UoZGF0YSRQb3NpdGlvbj09IkxGIiB8IGRhdGEkUG9zaXRpb249PSJMVyIsIkxXIixkYXRhJFBvc2l0aW9uKQ0KZGF0YSRQb3NpdGlvbiA8LSBpZl9lbHNlKGRhdGEkUG9zaXRpb249PSJSRiIgfCBkYXRhJFBvc2l0aW9uPT0iUlciLCJSVyIsZGF0YSRQb3NpdGlvbikNCmRhdGEkUG9zaXRpb24gPC0gaWZfZWxzZShkYXRhJFBvc2l0aW9uPT0iQ00iIHwgZGF0YSRQb3NpdGlvbj09IlJDTSIgfCBkYXRhJFBvc2l0aW9uPT0iTENNIiwiQ00iLGRhdGEkUG9zaXRpb24pDQpkYXRhJFBvc2l0aW9uIDwtIGlmX2Vsc2UoZGF0YSRQb3NpdGlvbj09IkNCIiB8IGRhdGEkUG9zaXRpb249PSJSQ0IiIHwgZGF0YSRQb3NpdGlvbj09IkxDQiIsIkNCIixkYXRhJFBvc2l0aW9uKQ0KZGF0YSRQb3NpdGlvbiA8LSBpZl9lbHNlKGRhdGEkUG9zaXRpb249PSJDRE0iIHwgZGF0YSRQb3NpdGlvbj09IkxETSIgfCBkYXRhJFBvc2l0aW9uPT0iUkRNIiwiQ0RNIixkYXRhJFBvc2l0aW9uKQ0KZGF0YSRQb3NpdGlvbiA8LSBpZl9lbHNlKGRhdGEkUG9zaXRpb249PSJDQU0iIHwgZGF0YSRQb3NpdGlvbj09IkxBTSIgfCBkYXRhJFBvc2l0aW9uPT0iUkFNIiwiQ0FNIixkYXRhJFBvc2l0aW9uKQ0KaGVhZChkYXRhKQ0KYGBgDQoNClRvdGFsIHBsYXllcnMgYmFzZWQgb24gcG9zaXRpb24NCmBgYHtyfQ0KcGwgPC0gZ2dwbG90KGRhdGEsYWVzKHggPSBQb3NpdGlvbikpICsgZ2VvbV9iYXIoKSArIHRoZW1lX21pbmltYWwoKSANCmdncGxvdGx5KHBsKQ0KYGBgDQoNCkdldHRpbmcgdG9wIHBsYXllcnMgdXNpbmcgb3JkZXINCmBgYHtyfQ0KdG9wLnBsYXllcnMgPC0gZGF0YVtvcmRlcihkYXRhJE92ZXJhbGwsZGVjcmVhc2luZyA9IFQpLF0NCmBgYA0KDQpgYGB7cn0NCnRvcC4yNSA8LSBoZWFkKHRvcC5wbGF5ZXJzLDI1KQ0KYGBgDQoNCkplcnNleSBudW1iZXJzIGZvciB0aGUgdG9wIDI1IHBsYXllcnMNCmBgYHtyfQ0KcGwgPC0gZ2dwbG90KHRvcC4yNSxhZXMoSmVyc2V5Lk51bWJlcikpICsgZ2VvbV9iYXIoKSArIHRoZW1lX21pbmltYWwoKSANCmdncGxvdGx5KHBsKQ0KYGBgDQoNCkFnZXMgb2YgdG9wIDI1IHBsYXllcnMNCmBgYHtyfQ0KcGwgPC0gZ2dwbG90KHRvcC4yNSxhZXMoQWdlKSkgKyBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEpICsgdGhlbWVfbWluaW1hbCgpIA0KZ2dwbG90bHkocGwpDQpgYGANCg0KVG9wIDI1IGJhc2VkIG9uIGNvdW50cmllcw0KYGBge3J9DQpwbCA8LSBnZ3Bsb3QodG9wLjI1LGFlcyhOYXRpb25hbGl0eSkpICsgZ2VvbV9iYXIoKSArIHRoZW1lX21pbmltYWwoKSArIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsaGp1c3QgPSAxKSkNCmdncGxvdGx5KHBsKQ0KYGBgDQoNClRvcCAyNSBieSBwb3NpdGlvbg0KYGBge3J9DQpwbCA8LSBnZ3Bsb3QodG9wLjI1LGFlcyh4ID0gUG9zaXRpb24sZmlsbCA9IE5hbWUpKSArIGdlb21fYmFyKCkgKyB0aGVtZV9taW5pbWFsKCkgDQpnZ3Bsb3RseShwbCkNCmBgYA0KDQpHZXR0aW5nIHRoZSB0b3AgcG90ZW50aWFsIHBsYXllcnMgd2l0aCBhZ2UgPCAyNSANCmBgYHtyfQ0KdG9wLnBvdGVudGlhbCA8LSBkYXRhICU+JSBmaWx0ZXIoQWdlPD0yNSkgJT4lIGFycmFuZ2UoZGVzYyhQb3RlbnRpYWwpKQ0KaGVhZCh0b3AucG90ZW50aWFsKQ0KYGBgDQoNCmBgYHtyfQ0KdG9wLjUwLnBvdGVudGlhbCA8LSBoZWFkKHRvcC5wb3RlbnRpYWwsNTApDQpgYGANCg0KQ2x1YnMgb2YgdG9wIDUwIHBvdGVudGlhbCBwbGF5ZXJzDQpgYGB7cn0NCnBsIDwtIGdncGxvdCh0b3AuNTAucG90ZW50aWFsLGFlcyh4PU5hdGlvbmFsaXR5KSkgKyBnZW9tX2JhcihhZXMoZmlsbD1OYW1lKSkgKyB0aGVtZV9taW5pbWFsKCkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LGhqdXN0ID0gMSkpDQpnZ3Bsb3RseShwbCkNCmBgYA0KDQoNCmBgYHtyfQ0KdGVtcCA8LSB0b3AuNTAucG90ZW50aWFsW29yZGVyKHRvcC41MC5wb3RlbnRpYWwkQWdlLHRvcC41MC5wb3RlbnRpYWwkVmFsdWUsLXRvcC41MC5wb3RlbnRpYWwkUG90ZW50aWFsLC10b3AuNTAucG90ZW50aWFsJE92ZXJhbGwpLF0NCmhlYWQodGVtcCkNCmBgYA0KDQpgYGB7cn0NCnRvcC4yNS5hbmQudG9wLjUwLnBvdGVudGlhbCA8LSByYmluZCh0b3AuMjUsdG9wLjUwLnBvdGVudGlhbCkNCmBgYA0KDQpUb3AgNTAgcGxheWVycyBiYXNlZCBvbiBDbHViDQpgYGB7cn0NCnBsIDwtIGdncGxvdCh0b3AuNTAucG90ZW50aWFsLGFlcyh4PUNsdWIpKSArIGdlb21fYmFyKCkgKyB0aGVtZV9taW5pbWFsKCkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LGhqdXN0ID0gMSkpDQpnZ3Bsb3RseShwbCkNCmBgYA0KDQoNCg0KDQoNCg==